library(modplots)
##
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.3 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.1.3 ✔ stringr 1.4.0
## ✔ readr 1.4.0 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(Seurat)
## Attaching SeuratObject
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Mitochondrial genes
mt <- c("ENSGALG00000035334", #COX3
"ENSGALG00000032456", #COII
"ENSGALG00000032142", #MT-CO1
"ENSGALG00000032079", #MT-CYB
"ENSGALG00000037838", #ND6
"ENSGALG00000029500", #ND5
"ENSGALG00000036229", #MT-ND4
"ENSGALG00000042478", #ND4L
"ENSGALG00000030436", #ND3
"ENSGALG00000041091", #MT-ATP6
"ENSGALG00000032465", #MT-ATP8
"ENSGALG00000043768", #MT-ND2
"ENSGALG00000042750") #MT-ND1
# volcanoplot thresholds
p.adj <- 0.05
l2fc <- 0.5
# seurat object
my.se <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_seurat_250723.rds")
# cluster labels from B10int and L10int
ctrl_lumb_int_combined_labels <- readRDS("~/spinal_cord_paper/annotations/ctrl_lumb_int_combined_labels.rds")
identical(colnames(my.se), rownames(ctrl_lumb_int_combined_labels))
## [1] TRUE
my.se$annot_sample <- ctrl_lumb_int_combined_labels$annot_sample
my.se@active.assay <- "RNA"
We do DE analysis per cluster, contrasting B10 and L10 samples:
markers <- list()
numbers <- list()
composition <-list()
for (i in seq(levels(Idents(my.se)))) {
# subset for individual clusters
mn.se <- subset(x = my.se, idents = levels(Idents(my.se))[i])
mn.se$sample <- str_extract(mn.se$orig.ident, "ctrl|lumb")
composition[[i]] <- mn.se[[]] %>%
select(sample, annot_sample)
Idents(mn.se) <- "sample"
tmp_markers <- FindMarkers(mn.se,
ident.1 = "ctrl",
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.2,
latent.vars = c("CC.Difference.seurat"),
test.use = "MAST",
assay = "RNA")
# cell numbers per sample
numbers[[i]] <- data.frame(table(mn.se$sample))
tmp_markers <- tmp_markers %>%
rownames_to_column("Gene.stable.ID") %>%
left_join(gnames)
markers[[i]] <- tmp_markers
}
names(markers) <- paste0("cl-", levels(Idents(my.se)))
names(numbers) <- paste0("cl-", levels(Idents(my.se)))
names(composition) <- paste0("cl-", levels(Idents(my.se)))
# bind lists into data frames
lumb_markers <- bind_rows(markers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
lumb_numbers <- bind_rows(numbers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
lumb_composition <- bind_rows(composition, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
saveRDS(lumb_markers, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_markers.rds")
saveRDS(lumb_numbers, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_numbers.rds")
saveRDS(lumb_composition, "~/spinal_cord_paper/data/Gg_ctrl_lumb_int_composition.rds")
# load the DE data
lumb_markers <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_markers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
lumb_numbers <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_numbers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
lumb_composition <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_lumb_int_composition.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
Plot the cluster compositions and the number of marker genes.
DimPlot(my.se, label = TRUE, reduction = "tsne")
ggplot(data = lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 3) +
geom_text(nudge_y = 50, size = 3) +
ggtitle("Cluster composition by sample (nCells)")
ggplot(data = lumb_markers %>% filter(p_val_adj < 0.05 & avg_log2FC > 0.5), aes(x = cluster, group = cluster)) +
geom_bar() +
facet_wrap("cluster", nrow = 3,scales = "free_x") +
ggtitle("Number of sig. DE genes")
gnames[grepl("TUBB3",gnames$Gene.name),]
# TUBB3 expression
VlnPlot(my.se, features = c("ENSGALG00000000059"), pt.size = 0)
# vector of neuronal clusters
neuron_clusters <- c(3,7,17,20,27,28,30)
neuron_lumb_markers <- filter(lumb_markers, clust_id %in% neuron_clusters)
neuron_lumb_numbers <- filter(lumb_numbers, clust_id %in% neuron_clusters)
neuron_lumb_composition <- filter(lumb_composition, clust_id %in% neuron_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = neuron_lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- neuron_lumb_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BL_neuron_barplot <- ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("From which clusters do cells come?")
BL_neuron_barplot
toplot <- neuron_lumb_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "lumb",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
lumb = "#419c73",
ns = "grey")
shapes <- c(ctrl = 21,
lumb = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
ggplotly(volplot, width = 1000, height = 600)
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes")
ggplotly(volplot_nomt, width = 1000, height = 600)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 6096 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 45 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
pdf("~/spinal_cord_paper/figures/Fig_4_neuron_volcano.pdf", width = 9, height = 7)
volplot_nomt +
NoLegend() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 6096 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 43 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
BL_neuron_barplot
dev.off()
## png
## 2
Cluster 30 (Consisting of 161 B10 vs 13 L10 cells) shows no MN related markers. Seemingly, those 13 cells are indeed motor neurons. This is supported by the fact of their expression of TUBB3, FOXP1, and SLC18A3.
# Motor neuron cluster
mn.se <- subset(x = my.se, idents = 30)
mn.se$sample <- str_extract(mn.se$orig.ident, "ctrl|lumb")
mn.se@active.assay <- "integrated"
mn_markers <- gnames[grepl("TUBB3|FOXP1|SLC18A3", gnames$Gene.name),]
mn_markers
VlnPlot(mn.se, group.by = "sample", mn_markers$Gene.stable.ID, cols = c("darkgrey", "#419c73"))
mn_DE <- gnames[grepl("PRIMA1|NCAM1|^SST$|PBX3", gnames$Gene.name),]
mn_DE
VlnPlot(mn.se, group.by = "sample", mn_DE$Gene.stable.ID, cols = c("darkgrey", "#419c73"))
gnames[grepl("^PLP1$|^MBP$|^FABP9$",gnames$Gene.name),]
# MFOL marker expression
VlnPlot(my.se, features = c("ENSGALG00000000112","ENSGALG00000013640","ENSGALG00000032453"), pt.size = 0, ncol = 1)
# vector of neuronal clusters
MFOL_clusters <- c(16, 18, 34)
MFOL_lumb_markers <- filter(lumb_markers, clust_id %in% MFOL_clusters)
MFOL_lumb_numbers <- filter(lumb_numbers, clust_id %in% MFOL_clusters)
MFOL_lumb_composition <- filter(lumb_composition, clust_id %in% MFOL_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = MFOL_lumb_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- MFOL_lumb_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BL_MFOL_barplot <- ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("From which clusters do cells come?")
BL_MFOL_barplot
toplot <- MFOL_lumb_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "lumb",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
lumb = "#419c73",
ns = "grey")
shapes <- c(ctrl = 21,
lumb = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
ggplotly(volplot, width = 800, height = 500)
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes")
ggplotly(volplot_nomt, width = 800, height = 500)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 2944 rows containing missing values (geom_text_repel).
pdf("~/spinal_cord_paper/figures/Fig_4_MFOL_volcano.pdf", width = 7, height = 3.5)
volplot_nomt +
NoLegend() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 2944 rows containing missing values (geom_text_repel).
BL_MFOL_barplot
dev.off()
## png
## 2
# seurat object
my.se <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_poly_int_seurat_250723.rds")
# cluster labels from B10int and L10int
ctrl_poly_int_combined_labels <- readRDS("~/spinal_cord_paper/annotations/ctrl_poly_int_combined_labels.rds")
identical(colnames(my.se), rownames(ctrl_poly_int_combined_labels))
## [1] TRUE
my.se$annot_sample <- ctrl_poly_int_combined_labels$annot_sample
my.se@active.assay <- "RNA"
We do DE analysis per cluster, contrasting B10 and P10 samples:
markers <- list()
numbers <- list()
composition <- list()
for (i in seq(levels(Idents(my.se)))) {
# subset for individual clusters
mn.se <- subset(x = my.se, idents = levels(Idents(my.se))[i])
mn.se$sample <- str_extract(mn.se$orig.ident, "ctrl|poly")
composition[[i]] <- mn.se[[]] %>%
select(sample, annot_sample)
Idents(mn.se) <- "sample"
tmp_markers <- FindMarkers(mn.se,
ident.1 = "ctrl",
only.pos = FALSE,
min.pct = 0.25,
logfc.threshold = 0.2,
latent.vars = c("CC.Difference.seurat"),
test.use = "MAST",
assay = "RNA")
# cell numbers per sample
numbers[[i]] <- data.frame(table(mn.se$sample))
tmp_markers <- tmp_markers %>%
rownames_to_column("Gene.stable.ID") %>%
left_join(gnames)
markers[[i]] <- tmp_markers
}
names(markers) <- paste0("cl-", levels(Idents(my.se)))
names(numbers) <- paste0("cl-", levels(Idents(my.se)))
names(composition) <- paste0("cl-", levels(Idents(my.se)))
# bind lists into data frames
poly_markers <- bind_rows(markers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
poly_numbers <- bind_rows(numbers, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
poly_composition <- bind_rows(composition, .id = "cluster") %>%
mutate(cluster = factor(cluster, levels = paste0("cl-", levels(Idents(my.se)))))
saveRDS(poly_markers, "~/spinal_cord_paper/data/Gg_ctrl_poly_int_markers.rds")
saveRDS(poly_numbers, "~/spinal_cord_paper/data/Gg_ctrl_poly_int_numbers.rds")
saveRDS(poly_composition, "~/spinal_cord_paper/data/Gg_ctrl_poly_int_composition.rds")
# load the DE data
poly_markers <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_poly_int_markers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
poly_numbers <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_poly_int_numbers.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
poly_composition <- readRDS("~/spinal_cord_paper/data/Gg_ctrl_poly_int_composition.rds") %>%
mutate(clust_id = str_remove(cluster, "^cl-"))
Plot the cluster compositions and the number of marker genes.
DimPlot(my.se, label = TRUE, reduction = "tsne")
ggplot(data = poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 3) +
geom_text(nudge_y = 50, size = 3) +
ggtitle("Cluster composition by sample (nCells)")
ggplot(data = poly_markers %>% filter(p_val_adj < 0.05 & avg_log2FC > 0.5), aes(x = cluster, group = cluster)) +
geom_bar() +
facet_wrap("cluster", nrow = 3,scales = "free_x") +
ggtitle("Number of sig. DE genes")
gnames[grepl("TUBB3",gnames$Gene.name),]
# TUBB3 expression
VlnPlot(my.se, features = c("ENSGALG00000000059"), pt.size = 0)
# vector of neuronal clusters
neuron_clusters <- c(5,6,11,20,25,27,29,30,32)
neuron_poly_markers <- filter(poly_markers, clust_id %in% neuron_clusters)
neuron_poly_numbers <- filter(poly_numbers, clust_id %in% neuron_clusters)
neuron_poly_composition <- filter(poly_composition, clust_id %in% neuron_clusters)
Barplots showing number of cells from B10 or P10, and the contributions from the individual B10int and P10int clusters:
ggplot(data = neuron_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- neuron_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BP_neuron_barplot <- ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("From which clusters do cells come?")
BP_neuron_barplot
toplot <- neuron_poly_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "poly",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
poly = "goldenrod3",
ns = "grey")
shapes <- c(ctrl = 21,
poly = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
ggplotly(volplot, width = 1000, height = 600)
Plots without mitochondrial genes:
toplot_nomt <- toplot %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 2, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes")
ggplotly(volplot_nomt)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 6087 rows containing missing values (geom_text_repel).
pdf("~/spinal_cord_paper/figures/Fig_5_neuron_volcano.pdf", width = 11, height = 7)
volplot_nomt +
NoLegend() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 6087 rows containing missing values (geom_text_repel).
BP_neuron_barplot
dev.off()
## png
## 2
Single plot (no faceting due to low number of DE genes).
volplot_nomt_ind <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = cluster,
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = c("black", "grey", "goldenrod3"))+
scale_shape_manual(values = c(0, 16, 2, 3, 5, 16, 16, 16, 16)) +
xlim(c(-1,1)) +
ylab("-log10(padj)") +
theme_bw() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
volplot_nomt_ind
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 6087 rows containing missing values (geom_text_repel).
gnames[grepl("^PLP1$|^MBP$|^FABP9$",gnames$Gene.name),]
# MFOL marker expression
VlnPlot(my.se, features = c("ENSGALG00000000112","ENSGALG00000013640","ENSGALG00000032453"), pt.size = 0, ncol = 1)
# vector of neuronal clusters
MFOL_clusters <- c(10, 24)
MFOL_poly_markers <- filter(poly_markers, clust_id %in% MFOL_clusters)
MFOL_poly_numbers <- filter(poly_numbers, clust_id %in% MFOL_clusters)
MFOL_poly_composition <- filter(poly_composition, clust_id %in% MFOL_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = MFOL_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- MFOL_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BP_MFOL_barplot <- ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("From which clusters do cells come?")
BP_MFOL_barplot
toplot <- MFOL_poly_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "poly",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
poly = "goldenrod3",
ns = "grey")
shapes <- c(ctrl = 21,
poly = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
ggplotly(volplot, width = 800, height = 500)
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes")
ggplotly(volplot_nomt, width = 800, height = 500)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 406 rows containing missing values (geom_text_repel).
pdf("~/spinal_cord_paper/figures/Fig_5_MFOL_volcano.pdf", width = 5, height = 3.5)
volplot_nomt +
NoLegend() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 406 rows containing missing values (geom_text_repel).
BP_MFOL_barplot
dev.off()
## png
## 2
gnames[grepl("^RSPO1$|^SHH$",gnames$Gene.name),]
# MFOL marker expression
VlnPlot(my.se, features = c("ENSGALG00000001946","ENSGALG00000006379"), pt.size = 0, ncol = 1)
# vector of neuronal clusters
RPFP_clusters <- c(22, 23)
RPFP_poly_markers <- filter(poly_markers, clust_id %in% RPFP_clusters)
RPFP_poly_numbers <- filter(poly_numbers, clust_id %in% RPFP_clusters)
RPFP_poly_composition <- filter(poly_composition, clust_id %in% RPFP_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = RPFP_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- RPFP_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BP_RPFP_barplot <- ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("From which clusters do cells come?")
BP_RPFP_barplot
toplot <- RPFP_poly_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "poly",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
poly = "goldenrod3",
ns = "grey")
shapes <- c(ctrl = 21,
poly = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
ggplotly(volplot, width = 800, height = 500)
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes")
ggplotly(volplot_nomt, width = 800, height = 500)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 2106 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 18 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
pdf("~/spinal_cord_paper/figures/Fig_5_RPFP_volcano.pdf", width = 5, height = 3.5)
volplot_nomt +
NoLegend() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 2106 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 34 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
BP_RPFP_barplot
dev.off()
## png
## 2
Clusters 16 and 18 show the highest numbers of DE genes. They are a progenitor population
# vector of neuronal clusters
NPC_clusters <- c(16, 18)
NPC_poly_markers <- filter(poly_markers, clust_id %in% NPC_clusters)
NPC_poly_numbers <- filter(poly_numbers, clust_id %in% NPC_clusters)
NPC_poly_composition <- filter(poly_composition, clust_id %in% NPC_clusters)
Barplots showing number of cells from B10 or L10, and the contributions from the individual B10int and L10int clusters:
ggplot(data = NPC_poly_numbers, aes(x = Var1, y = Freq, group = cluster, label = Freq)) +
geom_col() +
facet_wrap("cluster", nrow = 1) +
geom_text(nudge_y = 10) +
ggtitle("Cluster composition by sample (nCells)")
toplot <- NPC_poly_composition %>%
mutate(annot_sample = str_extract(annot_sample, "\\d{1,2}_.{1}")) %>%
mutate(annot_sample = factor(annot_sample)) %>%
group_by(sample, cluster) %>%
count(annot_sample) %>%
mutate(label_ypos = cumsum(n)- 0.5*n)
BP_NPC_barplot <-ggplot(data=toplot, aes(x=sample, y=n, fill=fct_rev(annot_sample))) +
geom_bar(stat="identity", color = "black") +
geom_text(aes(y=label_ypos, label=annot_sample),
color="black", size=3.5) +
facet_wrap("cluster", nrow = 1) +
theme(legend.position="none") +
ggtitle("From which clusters do cells come?")
BP_NPC_barplot
toplot <- NPC_poly_markers %>%
mutate(gene_type = case_when(
avg_log2FC >= 0.5 & p_val_adj <= 0.05 ~ "ctrl",
avg_log2FC <= -0.5 & p_val_adj <= 0.05 ~ "poly",
TRUE ~ "ns")
)
cols <- c(ctrl = "black",
poly = "goldenrod3",
ns = "grey")
shapes <- c(ctrl = 21,
poly = 21,
ns = 20)
volplot <- ggplot(data = toplot,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes")
ggplotly(volplot, width = 800, height = 500)
Plots without HOX and mitochondrial genes:
# filter out HOX and MT genes
toplot_nomt <- toplot %>%
filter(!grepl("^HOX", Gene.name)) %>%
filter(!Gene.stable.ID %in% mt)
volplot_nomt <- ggplot(data = toplot_nomt,
aes(x = avg_log2FC,
y = -log10(p_val_adj),
label = Gene.name,
color = gene_type,
shape = gene_type
)) +
geom_point() +
geom_hline(yintercept = -log10(p.adj), linetype = "dashed") +
geom_vline(xintercept = c(-l2fc,l2fc), linetype = "dashed") +
scale_color_manual(values = cols) +
scale_shape_manual(values = shapes) +
facet_wrap("cluster", nrow = 1, scales = "free") +
ylab("-log10(padj)") +
theme_bw()+
ggtitle("Sig. marker genes (without MT and HOX genes")
ggplotly(volplot_nomt, width = 800, height = 500)
volplot_nomt +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 1899 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 40 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 134 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
pdf("~/spinal_cord_paper/figures/Fig_5_NPC_volcano.pdf", width = 5, height = 3.5)
volplot_nomt +
NoLegend() +
ggrepel::geom_text_repel(aes(label = ifelse(gene_type == 'ns',
NA,
Gene.name)),
size = 3,
color = "black")
## Warning: Removed 1899 rows containing missing values (geom_text_repel).
## Warning: ggrepel: 48 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 146 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
BP_NPC_barplot
dev.off()
## png
## 2
# Date and time of Rendering
Sys.time()
## [1] "2024-09-10 15:36:09 CEST"
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /scicore/soft/apps/OpenBLAS/0.3.1-GCC-7.3.0-2.30/lib/libopenblas_sandybridgep-r0.3.1.so
##
## locale:
## [1] en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.10.0 SeuratObject_4.0.2 Seurat_4.0.5 forcats_0.5.1
## [5] stringr_1.4.0 dplyr_1.0.10 purrr_0.3.4 readr_1.4.0
## [9] tidyr_1.1.3 tibble_3.1.8 ggplot2_3.3.3 tidyverse_1.3.1
## [13] modplots_1.0.0
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1
## [3] plyr_1.8.6 igraph_1.2.6
## [5] lazyeval_0.2.2 sp_1.4-5
## [7] splines_4.1.0 crosstalk_1.1.1
## [9] listenv_0.8.0 scattermore_0.7
## [11] GenomeInfoDb_1.28.0 digest_0.6.27
## [13] htmltools_0.5.1.1 fansi_0.5.0
## [15] magrittr_2.0.1 memoise_2.0.0
## [17] tensor_1.5 cluster_2.1.2
## [19] ROCR_1.0-11 globals_0.16.2
## [21] Biostrings_2.60.0 modelr_0.1.8
## [23] matrixStats_0.58.0 spatstat.sparse_3.0-0
## [25] colorspace_2.0-1 rvest_1.0.2
## [27] blob_1.2.1 ggrepel_0.9.1
## [29] haven_2.4.1 xfun_0.34
## [31] crayon_1.4.1 RCurl_1.98-1.3
## [33] jsonlite_1.7.2 spatstat.data_3.0-0
## [35] survival_3.2-11 zoo_1.8-9
## [37] glue_1.6.2 polyclip_1.10-0
## [39] gtable_0.3.0 zlibbioc_1.38.0
## [41] XVector_0.32.0 leiden_0.3.9
## [43] DelayedArray_0.18.0 future.apply_1.7.0
## [45] BiocGenerics_0.38.0 abind_1.4-5
## [47] scales_1.1.1 pheatmap_1.0.12
## [49] DBI_1.1.1 miniUI_0.1.1.1
## [51] Rcpp_1.0.7 viridisLite_0.4.0
## [53] xtable_1.8-4 reticulate_1.22
## [55] spatstat.core_2.1-2 bit_4.0.4
## [57] stats4_4.1.0 htmlwidgets_1.5.3
## [59] httr_1.4.2 RColorBrewer_1.1-2
## [61] ellipsis_0.3.2 ica_1.0-2
## [63] farver_2.1.0 pkgconfig_2.0.3
## [65] dbplyr_2.1.1 sass_0.4.0
## [67] uwot_0.1.10 deldir_1.0-6
## [69] utf8_1.2.1 labeling_0.4.2
## [71] tidyselect_1.2.0 rlang_1.0.6
## [73] reshape2_1.4.4 later_1.2.0
## [75] AnnotationDbi_1.54.0 cellranger_1.1.0
## [77] munsell_0.5.0 tools_4.1.0
## [79] cachem_1.0.5 cli_3.4.1
## [81] generics_0.1.3 RSQLite_2.2.7
## [83] broom_0.7.6 ggridges_0.5.3
## [85] org.Gg.eg.db_3.13.0 evaluate_0.20
## [87] fastmap_1.1.0 yaml_2.2.1
## [89] goftest_1.2-2 fs_1.5.0
## [91] knitr_1.41 bit64_4.0.5
## [93] fitdistrplus_1.1-6 RANN_2.6.1
## [95] KEGGREST_1.32.0 pbapply_1.4-3
## [97] future_1.30.0 nlme_3.1-152
## [99] mime_0.10 xml2_1.3.2
## [101] compiler_4.1.0 rstudioapi_0.13
## [103] png_0.1-7 spatstat.utils_3.0-1
## [105] reprex_2.0.1 bslib_0.2.5.1
## [107] stringi_1.6.2 highr_0.9
## [109] lattice_0.20-44 Matrix_1.3-3
## [111] vctrs_0.5.1 pillar_1.8.1
## [113] lifecycle_1.0.3 spatstat.geom_3.0-3
## [115] lmtest_0.9-38 jquerylib_0.1.4
## [117] RcppAnnoy_0.0.19 data.table_1.14.0
## [119] cowplot_1.1.1 bitops_1.0-7
## [121] irlba_2.3.3 GenomicRanges_1.44.0
## [123] httpuv_1.6.1 patchwork_1.1.1
## [125] R6_2.5.0 promises_1.2.0.1
## [127] KernSmooth_2.23-20 gridExtra_2.3
## [129] IRanges_2.26.0 parallelly_1.33.0
## [131] codetools_0.2-18 MASS_7.3-54
## [133] assertthat_0.2.1 SummarizedExperiment_1.22.0
## [135] withr_2.4.2 sctransform_0.3.3
## [137] GenomeInfoDbData_1.2.6 S4Vectors_0.30.0
## [139] hms_1.1.0 mgcv_1.8-35
## [141] parallel_4.1.0 grid_4.1.0
## [143] rpart_4.1-15 rmarkdown_2.17
## [145] MatrixGenerics_1.4.0 Rtsne_0.15
## [147] lubridate_1.7.10 Biobase_2.52.0
## [149] shiny_1.6.0